Observed log-chlorophyll at representative station in SF Bay Delta region.
library(tidyverse)
library(lubridate)
library(mgcv)
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')
# flow data, left moving window average of 30 days
data(flow_dat)
fl_dat <- flow_dat %>%
rename(date = Date) %>%
filter(station %in% 'sjr') %>%
mutate(
qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
)
# format the data to model
data(sf_dat)
sf_sub <- sf_dat %>%
filter(Site_Code %in% 'P8') %>%
rename(date = Date) %>%
mutate(
doy = yday(date),
dec_time = decimal_date(date),
yr = year(date),
mo = month(date, label = T)
) %>%
left_join(fl_dat, by = 'date') %>%
mutate(
flo = log(qsm),
lnchl = log(chl)
) %>%
select(-q, -qsm, -station, -Latitude, -Longitude, -Location)
# plot, all
p <- ggplot(sf_sub, aes(x = date, y = lnchl)) +
geom_line() +
theme_bw()
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_sub, aes(x = yr, y = lnchl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_sub, aes(x = mo, y = lnchl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
Some simple GAMs to explore annual, seasonal trends.
# annual only
mod1 <- gam(lnchl ~ s(dec_time, bs = 'tp'),
data = sf_sub,
na.action = na.exclude
)
# seasonal only
mod2 <- gam(lnchl ~ s(doy, bs = 'cc'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# annual and seasonal, additive
mod3 <- gam(lnchl ~ s(dec_time, bs = 'tp') +
s(doy, bs = 'cc'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# annual and seasonal, interaction
mod4 <- gam(lnchl ~ te(dec_time, doy, bs = c('tp', 'cc')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
Summary stats of annual, seasonal models:
mods <- list(
mod1 = mod1,
mod2 = mod2,
mod3 = mod3,
mod4 = mod4
)
# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| name | smoother | edf | Ref.df | F | p.value |
|---|---|---|---|---|---|
| mod1 | s(dec_time) | 8.212548 | 8.830184 | 32.55938 | 0 |
| mod2 | s(doy) | 7.083832 | 8.000000 | 16.06665 | 0 |
| mod3 | s(dec_time) | 8.394141 | 8.896837 | 41.39759 | 0 |
| mod3 | s(doy) | 7.393221 | 8.000000 | 24.13289 | 0 |
| mod4 | te(dec_time,doy) | 16.859549 | 18.456279 | 25.60994 | 0 |
# summary stats of GAMs
map(mods, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| name | AIC | R2 |
|---|---|---|
| mod1 | 1188.287 | 0.3406925 |
| mod2 | 1304.711 | 0.1845578 |
| mod3 | 1030.524 | 0.5109149 |
| mod4 | 1083.738 | 0.4625216 |
# prediction data
pred_dat <- data.frame(
dec_time = seq(min(sf_sub$dec_time), max(sf_sub$dec_time), length = 1000)
) %>%
mutate(
date = date_decimal(dec_time),
date = as.Date(date),
mo = month(date, label = TRUE),
doy = yday(date),
yr = year(date)
) %>%
left_join(., fl_dat[, c('date', 'qsm')]) %>%
mutate(flo = log(qsm)) %>%
select(-qsm)
# predictions
sf_res <- pred_dat %>%
mutate(
mod1 = predict(mod1, newdata = pred_dat),
mod2 = predict(mod2, newdata = pred_dat),
mod3 = predict(mod3, newdata = pred_dat),
mod4 = predict(mod4, newdata = pred_dat)
) %>%
tidyr::gather('mods', 'pred', -date, -mo, -doy, -dec_time, -yr, -flo)
# plot
p <- ggplot(sf_res, aes(x = date)) +
geom_point(data = sf_sub, aes(y = lnchl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
# plot
p <- ggplot(sf_res, aes(x = doy, group = factor(yr), colour = yr)) +
geom_line(aes(y = pred)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
) +
facet_wrap(~ mods, ncol = 2)
ggplotly(p)
Adding flow data to the model:
# model, all terms additive
mod5 <- gam(lnchl ~ s(dec_time, bs = 'tp') + s(doy, bs = 'cc') + s(flo, bs = 'tp'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# model, all terms additive
mod6 <- gam(lnchl ~ te(dec_time, doy, bs = c('tp', 'cc')) + s(flo, bs = 'tp'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# model, all terms additive
mod7 <- gam(lnchl ~ te(dec_time, flo, bs = c('tp', 'tp')) + s(doy, bs = 'cc'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# model, all terms additive
mod8 <- gam(lnchl ~ te(flo, doy, bs = c('tp', 'cc')) + s(dec_time, bs = 'tp'),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# model, all terms additive
mod9 <- gam(lnchl ~ te(flo, doy, bs = c('tp', 'cc')) + te(flo, dec_time, bs = c('tp', 'tp')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
# model using a tensor product smooth for interactions
mod10 <- gam(lnchl ~ te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
Summary stats of annual, seasonal, flow models:
mods2 <- list(
mod5 = mod5,
mod6 = mod6,
mod7 = mod7,
mod8 = mod8,
mod9 = mod9,
mod10 = mod10
)
# smoother stats of GAMs
map(mods2, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| name | smoother | edf | Ref.df | F | p.value |
|---|---|---|---|---|---|
| mod5 | s(dec_time) | 8.443642 | 8.910357 | 41.055072 | 0.0000000 |
| mod5 | s(doy) | 7.410168 | 8.000000 | 23.576338 | 0.0000000 |
| mod5 | s(flo) | 4.927544 | 6.039680 | 2.104444 | 0.0484013 |
| mod6 | te(dec_time,doy) | 16.848322 | 18.443527 | 24.487013 | 0.0000000 |
| mod6 | s(flo) | 4.994615 | 6.104263 | 1.428754 | 0.1893507 |
| mod7 | te(dec_time,flo) | 17.179510 | 18.932122 | 17.608032 | 0.0000000 |
| mod7 | s(doy) | 7.370335 | 8.000000 | 23.899560 | 0.0000000 |
| mod8 | te(flo,doy) | 12.456261 | 14.898789 | 13.692255 | 0.0000000 |
| mod8 | s(dec_time) | 8.546913 | 8.935920 | 41.075520 | 0.0000000 |
| mod9 | te(flo,doy) | 12.868088 | 15.106326 | 12.703471 | 0.0000000 |
| mod9 | te(flo,dec_time) | 13.179584 | 20.000000 | 14.760514 | 0.0000000 |
| mod10 | te(dec_time,doy,flo) | 42.876219 | 53.581394 | 10.425867 | 0.0000000 |
# summary stats of GAMs
map(mods2, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| name | AIC | R2 |
|---|---|---|
| mod5 | 1023.706 | 0.5210944 |
| mod6 | 1081.051 | 0.4697431 |
| mod7 | 1060.274 | 0.4916756 |
| mod8 | 1022.771 | 0.5220878 |
| mod9 | 1059.692 | 0.4935151 |
| mod10 | 1062.573 | 0.5050973 |
# predictions
sf_res2 <- pred_dat %>%
mutate(
mod5 = predict(mod5, newdata = pred_dat),
mod6 = predict(mod6, newdata = pred_dat),
mod7 = predict(mod7, newdata = pred_dat),
mod8 = predict(mod8, newdata = pred_dat),
mod9 = predict(mod9, newdata = pred_dat),
mod10 = predict(mod10, newdata = pred_dat)
) %>%
tidyr::gather('mods', 'pred', -date, -mo, -doy, -dec_time, -yr, -flo)
# plot
p <- ggplot(sf_res2, aes(x = date)) +
geom_point(data = sf_sub, aes(y = lnchl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
ptheme <- theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
cols <- 'Spectral'
p5 <- dynagam(mod5, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('mod5')
p6 <- dynagam(mod6, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('mod6')
p7 <- dynagam(mod7, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('mod7')
p8 <- dynagam(mod8, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('mod8')
p9 <- dynagam(mod9, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('mod9')
p10 <- dynagam(mod10, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
ggtitle('mod10')
pleg <- g_legend(p10)
p10 <- p10 +
theme(legend.position = 'none')
grid.arrange(
pleg,
arrangeGrob(p5, p6, p7, p8, p9, p10, ncol = 6, bottom = 'lnQ', left = 'lnchl'),
ncol = 1,
heights = c(0.1, 1)
)
Backwards model selection, see here:
mod <- gam(lnchl ~ s(dec_time, bs = 'tp') +
s(doy, bs = 'cc') +
s(flo, bs = 'tp') +
te(flo, doy, bs = c('tp', 'cc')) +
te(flo, dec_time, bs = c('tp', 'tp')) +
te(dec_time, doy, bs = c('tp', 'cc')) +
te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
summary(mod)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## lnchl ~ s(dec_time, bs = "tp") + s(doy, bs = "cc") + s(flo, bs = "tp") +
## te(flo, doy, bs = c("tp", "cc")) + te(flo, dec_time, bs = c("tp",
## "tp")) + te(dec_time, doy, bs = c("tp", "cc")) + te(dec_time,
## doy, flo, bs = c("tp", "cc", "tp"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.61702 0.02385 67.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(dec_time) 8.1368 8.73 8.167 3.83e-11 ***
## s(doy) 6.6482 8.00 3.404 4.93e-05 ***
## s(flo) 4.8183 5.85 1.352 0.30477
## te(flo,doy) 15.0000 15.00 2.191 2.80e-05 ***
## te(flo,dec_time) 6.0911 16.00 0.716 0.00603 **
## te(dec_time,doy) 0.8695 15.00 0.051 0.18557
## te(dec_time,doy,flo) 11.6894 48.00 0.488 0.00111 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.578 Deviance explained = 61.9%
## GCV = 0.34885 Scale est. = 0.31463 n = 553
AIC(mod)
## [1] 983.2766
mod <- gam(lnchl ~ s(dec_time, bs = 'tp') +
s(doy, bs = 'cc') +
te(flo, doy, bs = c('tp', 'cc')) +
te(flo, dec_time, bs = c('tp', 'tp')) +
te(dec_time, doy, bs = c('tp', 'cc')) +
te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
summary(mod)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## lnchl ~ s(dec_time, bs = "tp") + s(doy, bs = "cc") + te(flo,
## doy, bs = c("tp", "cc")) + te(flo, dec_time, bs = c("tp",
## "tp")) + te(dec_time, doy, bs = c("tp", "cc")) + te(dec_time,
## doy, flo, bs = c("tp", "cc", "tp"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6170 0.0243 66.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(dec_time) 8.289 8.806 9.208 1.02e-12 ***
## s(doy) 6.929 8.000 2.719 0.000229 ***
## te(flo,doy) 11.528 13.168 1.543 0.122932
## te(flo,dec_time) 7.804 16.000 0.967 0.003855 **
## te(dec_time,doy) 3.258 12.000 0.589 0.006213 **
## te(dec_time,doy,flo) 3.925 48.000 0.087 0.146334
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.562 Deviance explained = 59.6%
## GCV = 0.35391 Scale est. = 0.32656 n = 553
AIC(mod)
## [1] 993.4513
mod <- gam(lnchl ~ s(dec_time, bs = 'tp') +
s(doy, bs = 'cc') +
te(flo, doy, bs = c('tp', 'cc')) +
te(flo, dec_time, bs = c('tp', 'tp')) +
te(dec_time, doy, bs = c('tp', 'cc')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
summary(mod)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## lnchl ~ s(dec_time, bs = "tp") + s(doy, bs = "cc") + te(flo,
## doy, bs = c("tp", "cc")) + te(flo, dec_time, bs = c("tp",
## "tp")) + te(dec_time, doy, bs = c("tp", "cc"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.61702 0.02439 66.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(dec_time) 8.263 8.80 9.372 5.75e-13 ***
## s(doy) 6.986 8.00 2.738 0.000219 ***
## te(flo,doy) 11.493 13.19 1.610 0.080277 .
## te(flo,dec_time) 7.693 16.00 0.994 0.008297 **
## te(dec_time,doy) 3.667 12.00 0.881 0.005124 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.559 Deviance explained = 58.9%
## GCV = 0.35411 Scale est. = 0.32907 n = 553
AIC(mod)
## [1] 994.3408
mod <- gam(lnchl ~ s(dec_time, bs = 'tp') +
s(doy, bs = 'cc') +
te(flo, dec_time, bs = c('tp', 'tp')) +
te(dec_time, doy, bs = c('tp', 'cc')),
knots = list(doy = c(1, 366)),
data = sf_sub,
na.action = na.exclude
)
summary(mod)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## lnchl ~ s(dec_time, bs = "tp") + s(doy, bs = "cc") + te(flo,
## dec_time, bs = c("tp", "tp")) + te(dec_time, doy, bs = c("tp",
## "cc"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.61702 0.02505 64.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(dec_time) 8.305 8.81 11.151 6.61e-16 ***
## s(doy) 7.364 8.00 12.193 < 2e-16 ***
## te(flo,dec_time) 6.293 20.00 0.773 0.00529 **
## te(dec_time,doy) 4.248 15.00 0.824 0.00324 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.535 Deviance explained = 55.7%
## GCV = 0.36508 Scale est. = 0.34712 n = 553
AIC(mod)
## [1] 1012.738